
*****************
* MAIN ANALYSIS *
*****************
* Labussiere 2023

/* Produce the following outputs:
	In main text:
		Figure 1 a) and b)
		Tables 2 3
	
	In supplementary materials:
		Figures S6 S7
		Tables S5 S6 S7 S8 S9 S10 
*/

clear all


use full_sample.dta

* Declare data nested structure
xtset sibling_id_ma

* Identify those whose sociodemographic characteristics are not missing
mark non_missing
markout non_missing gender first_born_FULREG date_birth_cat parent_ed_level_miss ///
homeowner_alo_cito q3_st_disp_income_cito secm_ma_agg2_cito secm_pa_agg2_cito ///
country_of_birth_ma_agg type_household_cito nbr_children_cito

tab non_missing
count if non_missing
disp "Multivariate sample: N = " r(N) 
* 94,727 

* Rename variables for clarity 
gen outcome = cito_score_st_year

gen age_nat_dummies = age_nat_13_censored
label values age_nat_dummies agenat_15

gen age_nat_cat = age_nat_13_cat7
label values age_nat_cat agecat_7


* Generate dummies for categorical variables for the within-between model
tabulate age_nat_cat, generate(D_age_nat)
tabulate age_nat_dummies, generate(D_age_nat_ctn)
tabulate parent_ed_level_miss, generate(D_ed_level)
tabulate q3_st_disp_income_cito, generate(D_income)
tabulate secm_ma_agg2_cito, generate(D_ses_ma)
tabulate secm_pa_agg2_cito, generate(D_ses_pa)
tab country_of_birth_ma_agg, generate(D_country_ma)
tab date_birth_cat, generate(D_date_birth)
gen single_parent = (type_household_cito == 2)
replace single_parent = . if missing(type_household_cito)


******************************************************************
* TABLE 3
* Main random-effects model
******************************************************************
xtreg outcome ib(6).age_nat_cat i.gender i.first_born_FULREG i.date_birth_cat ///
	nbr_children_cito ib(1).type_household_cito  ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.parent_ed_level_miss ib(6).country_of_birth_ma_agg, coeflegend
estimates store Table3

test _b[1.age_nat_cat] == _b[0.age_nat_cat]
test _b[2.age_nat_cat] == _b[1.age_nat_cat]
test _b[3.age_nat_cat] == _b[2.age_nat_cat]
* We cannot reject H0 for all 3
test _b[4.age_nat_cat] == _b[3.age_nat_cat]
* We can reject H0 at the 10% level
test _b[5.age_nat_cat] == _b[4.age_nat_cat]
* We can reject H0 at the 5% level
test _b[5.age_nat_cat] == _b[0.age_nat_cat]
* We can reject H0 at 1% level 

esttab REmodel_2_cat using Table3.tex, ///
replace unstack ti("") ///
modelwidth(6) label nogap nomtitles nonumbers  ///
nostar booktabs cells("b(fmt(3)) p(fmt(3)) ci_l(fmt(3)) ci_u(fmt(3))") r2


******************************************************************
* TABLES 2, S5 and S6
* Comparison of Random-effects with Within-between and OLS
******************************************************************
* OLS
regress outcome ib(6).age_nat_cat i.gender i.first_born_FULREG i.date_birth_cat ///
	i.parent_ed_level_miss i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito i.country_of_birth_ma_agg ///
	ib(1).type_household_cito nbr_children_cito
estimates store OLSmodel_cat

* Within-between (TABLE S5 + "Test" column of TABLE 2)
capture log close
log using TableS5.smcl, replace  
xthybrid outcome D_age_nat1 D_age_nat2 D_age_nat3  						///
				 D_age_nat4 D_age_nat5 D_age_nat6 						///
		gender first_born_FULREG D_ed_level2 D_ed_level3 D_ed_level4       ///
					D_income2 D_income3 D_income4 homeowner_alo_cito    ///
		D_ses_ma2 D_ses_ma3 D_ses_ma4 D_ses_pa2 D_ses_pa3 D_ses_pa4     ///
								single_parent nbr_children_cito		    ///
			D_date_birth2 D_date_birth3 D_date_birth4 D_date_birth5     ///
			D_date_birth6 D_date_birth7 D_date_birth8 D_date_birth9     ///
			D_country_ma1 D_country_ma2 D_country_ma3 D_country_ma4     ///
			D_country_ma5 D_country_ma7 D_country_ma8 D_country_ma9     ///
if non_missing, test clusterid(sibling_id_ma) p full
estimates store MIXEDmodel_cat
log close

* Export comparison output excluding OLS (TABLE 2)
esttab Table3 MIXEDmodel_cat using Table2.rtf, ///
replace ti("Comparison all main models for age at naturalisation estimates") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(0.age_nat_cat 1.age_nat_cat 2.age_nat_cat 3.age_nat_cat ///
4.age_nat_cat 5.age_nat_cat                     ///
W__D_age_nat1 W__D_age_nat2 W__D_age_nat3 W__D_age_nat4 ///
W__D_age_nat5 W__D_age_nat6                             ///
B__D_age_nat1 B__D_age_nat2 B__D_age_nat3 B__D_age_nat4 ///
B__D_age_nat5 B__D_age_nat6 						    ///
) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide

* Export comparison output including OLS (TABLE S6)
esttab Table3 OLSmodel_cat MIXEDmodel_cat using TableS6.rtf, ///
replace ti("Comparison all main models for age at naturalisation estimates") ///
mtitles("Model OLS" "Model RE" "Within/Between") ///
keep(0.age_nat_cat 1.age_nat_cat 2.age_nat_cat 3.age_nat_cat ///
4.age_nat_cat 5.age_nat_cat                     ///
W__D_age_nat1 W__D_age_nat2 W__D_age_nat3 W__D_age_nat4 ///
W__D_age_nat5 W__D_age_nat6                             ///
B__D_age_nat1 B__D_age_nat2 B__D_age_nat3 B__D_age_nat4 ///
B__D_age_nat5 B__D_age_nat6 						    ///
) ///
modelwidth(6) label nogap nonumbers b(3)  ///
nostar p wide




	********************************************
	* INTERACTIONS WITH NATURALISATION DUMMIES *
	********************************************
		
******************************************************************
* FIGURE 1 (A)
* Bar graph of predicted average test score conditionally 
* on citizenship status and parental education level 
******************************************************************
xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	ib(1).type_household_cito nbr_children_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	ib(1).parent_ed_level_miss ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss##nat_13_censored
margins, at(parent_ed_level_miss=(1 2 3) nat_13_censored=(0)) post
estimates store ed_nat_0

xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	ib(1).type_household_cito nbr_children_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	ib(1).parent_ed_level_miss ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss##nat_13_censored
margins, at(parent_ed_level_miss=(1 2 3) nat_13_censored=(1)) post
estimates store ed_nat_1

coefplot (ed_nat_0, label("Non-naturalised at age 14") bcolor("0 68 136")) ///
		 (ed_nat_1, label("Naturalised before age 14") bcolor("187 85 102")) ///
		 , vertical recast(bar) ciopts(recast(rcap) msize(large)) citop barwidth(0.3) ///
		 ytitle("Predicted standardised test score", height(6)) title("") ///
		 xtitle("Highest parental education level", height(5)) ///
		 xlab(1 "Low" 2 "Middle" 3 "High") yline(0, lcolor(gray) lpattern(shortdash)) ///
		 graphregion(color(white)) legend(symxsize(5))
graph export Figure1A.png, replace


******************************************************************
* TABLE S7
* Full random-effects model with interaction with parental education
******************************************************************
capture drop margins* m_* xaxis
xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	ib(1).type_household_cito nbr_children_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	ib(1).parent_ed_level_miss ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss##nat_13_censored
estimates store TableS7

esttab TableS7 using TableS7.tex, ///
replace ti("Analysis interaction effect random-effects model") ///
mtitles("Model 2 with interaction") ///
scalars(sigma_u sigma_e N N_g r2_w r2_b r2_o) ///
modelwidth(6) label nogap nonumbers b(3) nostar p 


******************************************************************
* FIGURE 1 (B)
* Bar graph of predicted average test score conditionally 
* on citizenship status and parental homeownership status
******************************************************************
xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	ib(1).type_household_cito nbr_children_cito ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss ///
	ib(1).parent_ed_level_miss##nat_13_censored ///
	homeowner_alo_cito##nat_13_censored 
margins, at(homeowner_alo_cito=(0 1) nat_13_censored=(0)) post
estimates store edho_nat_0

xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	ib(1).type_household_cito nbr_children_cito ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss ///
	ib(1).parent_ed_level_miss##nat_13_censored ///
	homeowner_alo_cito##nat_13_censored 
margins, at(homeowner_alo_cito=(0 1) nat_13_censored=(1)) post
estimates store edho_nat_1

coefplot (edho_nat_0, label("Non-naturalised at age 14") bcolor("0 68 136")) ///
		 (edho_nat_1, label("Naturalised before age 14") bcolor("187 85 102")) ///
		 , vertical recast(bar) ciopts(recast(rcap) msize(large)) citop barwidth(0.3) ///
		 ytitle("Predicted standardised test score", height(6)) title("") ///
		 xtitle("Parents' homeownership status", height(5)) ///
		 xlab(1 "Renters" 2 "Homeowners") yline(0, lcolor(gray) lpattern(shortdash)) ///
		 graphregion(color(white)) legend(symxsize(5))
graph export Figure1B.png, replace


******************************************************************
* TABLE S9
* Full random-effects model with interactions with parental education
* and homeownership status
******************************************************************
xtreg outcome i.nat_13_censored i.gender i.first_born_FULREG i.date_birth_cat ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	ib(1).type_household_cito nbr_children_cito ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss ///
	ib(1).parent_ed_level_miss##nat_13_censored ///
	homeowner_alo_cito##nat_13_censored 
estimates store Tables9

esttab Tables9 using Tables9.tex, ///
replace ti("Analysis interaction effect random-effects model") ///
mtitles("Model 2 with interaction") ///
scalars(sigma_u sigma_e N N_g r2_w r2_b r2_o) ///
modelwidth(6) label nogap nonumbers b(3) nostar p 


	***********************************************
	* INTERACTIONS WITH NATURALISATION CATEGORIES *
	***********************************************

******************************************************************
* FIGURE S6
* Line graph of predicted average test score conditionally on  
* citizenship status and parental education level 
******************************************************************
xtreg outcome ib(6).age_nat_cat i.gender i.first_born_FULREG i.date_birth_cat ///
	ib(1).type_household_cito nbr_children_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	ib(1).parent_ed_level_miss ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss##age_nat_cat
estimates store TableS8

margins age_nat_cat#parent_ed_level_miss if parent_ed_level_miss != 4
marginsplot, graphregion(color(white))  allsimplelabels ///
title("") ytitle(, height(6)) xtitle("Age at naturalisation", height(4)) ///
ytitle("Predicted standardised test score") ///
ci1opt(color("27 158 119")) plot1opts(lc("27 158 119") msymbol(d) mcolor("27 158 119")) ///
	ci2opt(color("0 68 136")) plot2opts(lc("0 68 136") msymbol(s) mcolor("0 68 136")) ///
	ci3opt(color("187 85 102")) plot3opts(lc("187 85 102") msymbol(t) mcolor("187 85 102")) 
graph export FigureS6.tif, replace width(3000)


******************************************************************
* TABLE S8
* Full random-effects model with interaction with parental education
******************************************************************
esttab TableS8 using TableS8.tex, ///
replace ti("Analysis interaction effect random-effects model") ///
mtitles("Model 2 with interaction") ///
scalars(sigma_u sigma_e N N_g r2_w r2_b r2_o) ///
modelwidth(6) label nogap nonumbers b(3) nostar p booktabs


******************************************************************
* FIGURE S7
* Line graph of predicted average test score conditionally on  
* citizenship status and parental education level + homeownership 
******************************************************************
xtreg outcome ib(6).age_nat_cat i.gender i.first_born_FULREG i.date_birth_cat ///
	ib(1).type_household_cito nbr_children_cito ///
	i.secm_ma_agg2_cito i.secm_pa_agg2_cito ///
	i.homeowner_alo_cito i.q3_st_disp_income_cito ///
	ib(1).parent_ed_level_miss ///
	ib(6).country_of_birth_ma_agg ///
	ib(1).parent_ed_level_miss##age_nat_cat ///
	i.homeowner_alo_cito##age_nat_cat
estimates store TableS10

margins age_nat_cat#homeowner_alo_cito
marginsplot, graphregion(color(white))  allsimplelabels ///
title("") ytitle(, height(6)) xtitle("Age at naturalisation", height(4)) ///
ytitle("Predicted standardised test score") ///
ciopt(color("27 158 119")) plotopts(lc("27 158 119") msymbol(d) mcolor("27 158 119")) ///
ci1opt(color("0 68 136")) plot1opts(lc("0 68 136") msymbol(t) mcolor("0 68 136")) 
graph export FigureS7.tif, replace width(3000)


******************************************************************
* TABLE S10
* Full random-effects model with interaction with parental education
* and homeownership status
******************************************************************
esttab TableS10 using TableS10.tex, ///
replace ti("Analysis interaction effect random-effects model") ///
mtitles("Model 2 with interaction") ///
scalars(sigma_u sigma_e N N_g r2_w r2_b r2_o) ///
modelwidth(6) label nogap nonumbers b(3) nostar p booktabs












